"""
requests模块：
    python中原生的一款基于网咯请求的模块
    功能强大 简单便捷 效率极高
    作用：模拟浏览器发请求
    如何使用：
        — 指定URL
        — 发起请求
        — 获取响应数据
        — 持久化存储
    response.encoding = 'utf-8'  # 解决乱码问题
"""
import requests
import re, csv

url = 'https://movie.douban.com/top250'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0'}
res = requests.get(url, headers=headers)
page_text = res.text

obj = re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>'
                 r'.*?<p class="">.*?<br>(?P<year>.*?)&nbsp'
                 r'.*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>'
                 r'.*?<span>(?P<num>.*?)</span>', re.S)

page_iter = obj.finditer(page_text)
f = open('豆瓣.csv', 'w', encoding='utf-8')
csvwriter = csv.writer(f)
for i in page_iter:
    # print(i.group('name'))
    # print(i.group('year').strip())
    # print(i.group('score'))
    # print(i.group('num'))
    dic = i.groupdict()
    dic['year'] = dic['year'].strip()
    csvwriter.writerow(dic.values())
f.close()
print('爬取完毕!!!')